#***************************************************************#
#                                                               #
#                                                               #
#                 REPLICATION FILES FOR:                        #
#                                                               #
#   Voluntary adoption of social welfare-enhancing behavior     #
#     Mask-wearing in Spain during the COVID-19 outbreak        #                           
#                                                               #
#                                                               #          
#                                                               #
#                                                               #
#***************************************************************#


#***************#
# Load Packages #
#***************#

library(rstudioapi)
library(readstata13)
library(pmdplyr)
library(Amelia)
library(randomizr)
library(ggplot2)
library(dplyr)
library(plotly)
library(hrbrthemes)
library(qualtRics)
library(Hmisc)
library(MASS)
library(foreign)
library(tidyverse)
library(reporttools)
library(haven)
library(lmtest)
library(sandwich)


#***********************#
# Set Working Directory #
#***********************#

current_path <- getActiveDocumentContext()$path 
setwd(dirname(current_path ))

#****************************#
#Figure 2: Research Context  #
#****************************#

covid <- as.data.frame(read.csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv",
                                sep = ",", dec='.', header = TRUE, stringsAsFactors = FALSE))

spain_covid <- covid[covid$location == "Spain" & covid$date > "2020-02-15" & covid$date < "2020-04-30",]
plot(as.Date(spain_covid$date), spain_covid$new_cases_smoothed)

# plot
spain_plot <- spain_covid %>% 
  ggplot( aes(x=as.Date(date), y=new_cases_smoothed)) +
  geom_line(color="#69b3a2") +
  ylim(0,8000) +
  xlab("Date") + ylab("Number of new cases (smoothed)") +
  annotate(geom="text", x=as.Date("2020-03-11"), y=4030, 
           label="Stay-at-home \n order begins", size = 3) +
  geom_vline(xintercept=as.Date("2020-03-17"), color="orange", size=.5) +
  annotate(geom="text", x=as.Date("2020-03-28"), y=4000, 
           label="Fieldwork \nperiod", size = 3) +
  geom_vline(xintercept=as.Date("2020-03-24"), color="red", size=.5, linetype = 2) +
  geom_vline(xintercept=as.Date("2020-04-02"), color="red", size=.5, linetype = 2) +
  #geom_rect(aes(xmin=as.Date("2020-03-24"), xmax=as.Date("2020-04-02"), ymin=0, ymax=8000),
  #          color = "red", fill = "red", alpha = 0.5) +
  annotate("rect", xmin = as.Date("2020-03-24"), xmax = as.Date("2020-04-02"), ymin = 0, ymax = 8000,
           alpha = .1) +
  theme_ipsum()

png(filename="spain_plot", 
    units="in", 
    width=9, 
    height=6, 
    pointsize=15, 
    res=1000)
spain_plot
dev.off()

#****************************#
#      Load Survey Data      #
#****************************#

covid19_survey2 <- read.dta13("replication_data_final.dta", convert.factors=T)

covid19_survey2$date <- substr(as.character(covid19_survey2$EndDate), start = 9, stop = 10)

colnames(covid19_survey2)[(colnames(covid19_survey2) == 'region')] <- 'region_code'

region_infect <- read.csv2("region_infect_rate.csv", sep = ",")[, c('region_code', "reg_infect_date", 'reg_cases_pc', 'reg_death_pc')]
region_infect$region_code <- as.numeric(as.character(region_infect$region_code) )
region_infect$date <- substr(str_pad(region_infect$reg_infect_date, 9, pad = "0"), start = 1, stop = 2)
region_infect$reg_cases_pc <- as.numeric(as.character(region_infect$reg_cases_pc))
region_infect$reg_deaths_pc <- as.numeric(as.character(region_infect$reg_death_pc))

covid.tmp <- merge(covid19_survey2, region_infect, by=c("region_code", 'date'), all.x = TRUE, all.y = FALSE)

prov_infect <- read.csv2("prov_infect_rate.csv", sep = ",")[, c('province_code', "prov_infect_date", 'prov_cases_pc', 'prov_death_pc')]
prov_infect$province_code <- as.character(str_pad(prov_infect$province_code, 2, pad = "0")) 
prov_infect$date <- substr(str_pad(prov_infect$prov_infect_date, 9, pad = "0"), start = 1, stop = 2)
prov_infect$prov_cases_pc <- as.numeric(as.character(prov_infect$prov_cases_pc))
prov_infect$prov_deaths_pc <- as.numeric(as.character(prov_infect$prov_death_pc))

covid19 <- merge(covid.tmp, prov_infect, by=c("province_code", "date"), all.x = TRUE)

###################################

(covid19 <- covid19 %>% #gender
    mutate(gender = case_when(
      covid19$Q2 == 1 ~ NA_real_,
      covid19$Q2 == 3 ~ 0, #male
      covid19$Q2 == 4 ~ 1  #female
    )))

(covid19 <- covid19 %>% #age_category
    mutate(age_cat = case_when(
      covid19$Q3 == 1 ~ 0,
      covid19$Q3 == 2 ~ 1,
      covid19$Q3 == 3 ~ 2,
      covid19$Q3 == 4 ~ 3,
      covid19$Q3 == 5 ~ 4,
      covid19$Q3 == 6 ~ 5
    )))

(covid19 <- covid19 %>% #age_category
    mutate(age_cat2 = case_when(
      covid19$Q3 == 1 ~ NA_real_,
      covid19$Q3 == 2 ~ 0,
      covid19$Q3 == 3 ~ 1,
      covid19$Q3 == 4 ~ 2,
      covid19$Q3 == 5 ~ 3,
      covid19$Q3 == 6 ~ 4
    )))

(covid19 <- covid19 %>% #education
    mutate(educ_cat = case_when(
      covid19$Q162 == 1 ~ 0,#secondary or less
      covid19$Q162 == 2 ~ 1,#high school
      covid19$Q162 == 3 ~ 2,#some college
      covid19$Q162 == 4 ~ 3,#college
      covid19$Q162 == 5 ~ 4,#graduate (MA)
      covid19$Q162 == 6 ~ 4#graduate (PhD)
    )))

(covid19 <- covid19 %>% #town size
    mutate(town_size = case_when(
      covid19$Q6 == 1 ~ 5,#capital
      covid19$Q6 == 2 ~ 4,#more100k
      covid19$Q6 == 3 ~ 3,#50to100k
      covid19$Q6 == 4 ~ 2,#20to50k
      covid19$Q6 == 5 ~ 1,#5to20k
      covid19$Q6 == 6 ~ 0#less than 5k
    )))

(covid19 <- covid19 %>% #occupation_category
    mutate(occup_cat = case_when(
      covid19$Q158 == 1 ~ 1,#Big business manager -> Small/Big business manager
      covid19$Q158 == 2 ~ 2,#Engineer
      covid19$Q158 == 3 ~ 7,#Doctor -> Medical Worker
      covid19$Q158 == 4 ~ 4,#Teacher/professor
      covid19$Q158 == 5 ~ 5,#Other qualified
      covid19$Q158 == 6 ~ 1,#Small business manager -> Small/Big business manager
      covid19$Q158 == 7 ~ 7,#Nurse -> Medical Worker
      covid19$Q158 == 8 ~ 8,#Middle skilled worker
      covid19$Q158 == 9 ~ 16,#Self-employed -> Others
      covid19$Q158 == 10 ~ 10,#Middle manager -> Supervisor/Middle Manager
      covid19$Q158 == 11 ~ 10,#Supervisor -> Supervisor/Middle Manager
      covid19$Q158 == 12 ~ 12,#Technician
      covid19$Q158 == 13 ~ 13,#Industrial worker -> Agriculture/Industrial Worker
      covid19$Q158 == 14 ~ 13,#Agriculture worker -> Agriculture/Industrial Worker
      covid19$Q158 == 15 ~ 15,#Unskilled worker
      covid19$Q158 == 16 ~ 16,#Unclassified -> Others
      covid19$Q158 == "" & covid19$age_cat == 5 ~ 17,#retired
      covid19$Q158 == "" & covid19$age_cat == 1 ~ 18,#student
      covid19$Q158 == "" ~ 16,#NAs -Others
    )))

(covid19 <- covid19 %>% #time_at_home
    mutate(time_home = case_when(
      covid19$Q24 == 1 ~ 3,#much more
      covid19$Q24 == 2 ~ 2,#more
      covid19$Q24 == 3 ~ 1,#a bit more
      covid19$Q24 == 4 ~ 0,#usual
    )))

(covid19 <- covid19 %>% #work_affect_covid
    mutate(work_affect = case_when(
      covid19$Q60 == 1 ~ 2,#erte
      covid19$Q60 == 2 ~ 3,#dismissed
      covid19$Q60 == 3 ~ 4,#telework
      covid19$Q60 == 4 & (covid19$occup_cat == 17 | covid19$occup_cat == 18) ~ 1,#inactive
      covid19$Q60 == 4 ~ 5,#unaffected
    )))


#****************************#
#Figure 3: Histogram, mask-wearing behavior
#****************************#

h = hist(covid19$facemask,plot=FALSE)
h$density = round(h$counts[h$counts!=0]/sum(h$counts)*100, 2)
temp_dat <- as.data.frame(cbind(c('Never', 'Rarely', 'Occasionally', 'Very frequently'), as.numeric(h$density)))

pdf("descr_facemask.pdf")
ggplot(temp_dat, aes(x = temp_dat[,1], y = as.numeric(as.character(temp_dat[,2])))) +
  geom_bar(stat='identity') +
  scale_x_discrete(limits=c('Never', 'Rarely', 'Occasionally', 'Very frequently')) +
  theme(axis.text.x = element_text(size=14)) +
  theme(axis.title.x = element_text(size = 14, margin = margin(t = 20, r = 0, b = 2, l = 0))) +
  theme(axis.title.y = element_text(size = 14, margin = margin(t = 0, r = 20, b = 2, l = 0))) +
  labs(title="", x="Mask-wearing behavior", y = "Percentage of respondents") +
  scale_y_continuous(breaks=c(seq(0,50,10)), limits = c(0,50)) +
  theme(panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
  panel.background = element_rect(fill = "white", colour = NA)) +
  annotate(geom="text", x=1, y=45, label="41%",
             color="black") +
  annotate(geom="text", x=2, y=14, label="10%",
             color="black") +
  annotate(geom="text", x=3, y=30, label="25.8%",
             color="black") +
  annotate(geom="text", x=4, y=27, label="23.1%",
             color="black")
dev.off()

#Table 1: Ordinal Logistic Regressions Investigating the Association Between Demographic Characteristics and Mask Use

summary(demo_ordinal <- polr(as.factor(facemask) ~ as.factor(gender) + as.factor(age_cat) + as.factor(educ_cat) + as.factor(occup_cat) + time_home + as.factor(work_affect), data = covid19, Hess=TRUE))

(sumtable <- coef(summary(demo_ordinal)))
p <- pnorm(abs(sumtable[, "t value"]), lower.tail = FALSE) * 2

(ci <- confint(demo_ordinal))

TABLE_1 <- cbind(round(exp(cbind(OR = coef(demo_ordinal), ci)), 2), p = round(p[1:length(coef(demo_ordinal))], 3))

print(TABLE_1)

#Table 2: The Association Between Risk Perceptions and Mask Use

(covid19 <- covid19 %>% #likely to get infected -> 4 = very likely
    mutate(likely_infect = case_when(
      covid19$Q52 == 4 ~ 0,
      covid19$Q52 == 3 ~ 1,
      covid19$Q52 == 5 ~ 2,
      covid19$Q52 == 2 ~ 3,
      covid19$Q52 == 1 ~ 4
    )))

(covid19 <- covid19 %>% #likely to have it already -> 4 = very likely
    mutate(likely_infected = case_when(
      covid19$Q43 == 6 ~ 2, #DK assigned to neither like nor unlike
      covid19$Q43 == 5 ~ 0,
      covid19$Q43 == 4 ~ 1,
      covid19$Q43 == 3 ~ 2,
      covid19$Q43 == 2 ~ 3,
      covid19$Q43 == 1 ~ 4
    )))

(covid19 <- covid19 %>% #trust health system -> 4 = trust
    mutate(trust_healthsystem = case_when(
      covid19$Q56 == 4 ~ 0,
      covid19$Q56 == 3 ~ 1,
      covid19$Q56 == 2 ~ 2,
      covid19$Q56 == 1 ~ 3
    )))

(covid19 <- covid19 %>% #concerned infected -> 4 = very concerned
    mutate(anxiety_covid = case_when(
      covid19$Q53 == 4 ~ 0,
      covid19$Q53 == 3 ~ 1,
      covid19$Q53 == 2 ~ 2,
      covid19$Q53 == 1 ~ 3
    )))

summary(table2_model1 <- polr(as.factor(facemask) ~ 
                               as.factor(likely_infected) +
                               as.factor(anxiety_covid) + 
                               as.factor(likely_infect) +
                               as.factor(trust_healthsystem), data = covid19, Hess=TRUE))

summary(table2_model2 <- polr(as.factor(facemask) ~ 
                               as.factor(likely_infected) +
                               as.factor(anxiety_covid) + 
                               as.factor(likely_infect) +
                               as.factor(trust_healthsystem) +
                               as.factor(gender) + as.factor(age_cat) + as.factor(educ_cat) + as.factor(occup_cat) + time_home + as.factor(work_affect), data = covid19, Hess=TRUE))

(table2_column1_coef <- coef(summary(table2_model1)))
table2_column1_p <- pnorm(abs(table2_column1_coef[, "t value"]), lower.tail = FALSE) * 2

(table2_column1_cint <- confint(table2_model1))

(table2_column2_coef <- coef(summary(table2_model2)))
table2_column2_p <- pnorm(abs(table2_column2_coef[, "t value"]), lower.tail = FALSE) * 2

(table2_column2_cint <- confint(table2_model2))

table2_column1 <- cbind(round(exp(cbind(OR = coef(table2_model1), table2_column1_cint)), 2), priska = round(table2_column1_p[1:length(coef(table2_model1))], 2))
table2_column2 <- cbind(round(exp(cbind(OR = coef(table2_model2), table2_column2_cint)), 2), priskb = round(table2_column2_p[1:length(coef(table2_model2))], 2))

TABLE_2 <- cbind(table2_column1, table2_column2[1:nrow(table2_column1),])

print(TABLE_2)

#Table 3: The Association Between Personality Traits and Mask Use

(covid19 <- covid19 %>%
    mutate(extr1 = case_when(
      as.numeric(covid19$Q154_1) == 1 ~ NA_real_,
      as.numeric(covid19$Q154_1) == 2 ~ NA_real_,
      as.numeric(covid19$Q154_1) == 3 ~ 2,
      as.numeric(covid19$Q154_1) == 4 ~ 3,
      as.numeric(covid19$Q154_1) == 5 ~ 4,
      as.numeric(covid19$Q154_1) == 6 ~ 5,
      as.numeric(covid19$Q154_1) == 7 ~ 6,
      as.numeric(covid19$Q154_1) == 8 ~ 0,
      as.numeric(covid19$Q154_1) == 9 ~ 1,
      as.numeric(covid19$Q154_1) == 10 ~ NA_real_
    )))

(covid19 <- covid19 %>%
    mutate(agree1 = case_when(
      as.numeric(covid19$Q154_2) == 1 ~ NA_real_,
      as.numeric(covid19$Q154_2) == 2 ~ NA_real_,
      as.numeric(covid19$Q154_2) == 3 ~ 4,
      as.numeric(covid19$Q154_2) == 4 ~ 3,
      as.numeric(covid19$Q154_2) == 5 ~ 2,
      as.numeric(covid19$Q154_2) == 6 ~ 1,
      as.numeric(covid19$Q154_2) == 7 ~ 0,
      as.numeric(covid19$Q154_2) == 8 ~ 6,
      as.numeric(covid19$Q154_2) == 9 ~ 5,
      as.numeric(covid19$Q154_2) == 10 ~ NA_real_
    )))

(covid19 <- covid19 %>%
    mutate(consc1 = case_when(
      as.numeric(covid19$Q154_3) == 1 ~ NA_real_,
      as.numeric(covid19$Q154_3) == 2 ~ NA_real_,
      as.numeric(covid19$Q154_3) == 3 ~ 2,
      as.numeric(covid19$Q154_3) == 4 ~ 3,
      as.numeric(covid19$Q154_3) == 5 ~ 4,
      as.numeric(covid19$Q154_3) == 6 ~ 5,
      as.numeric(covid19$Q154_3) == 7 ~ 6,
      as.numeric(covid19$Q154_3) == 8 ~ 0,
      as.numeric(covid19$Q154_3) == 9 ~ 1,
      as.numeric(covid19$Q154_3) == 10 ~ NA_real_
    )))

(covid19 <- covid19 %>%
    mutate(neur1 = case_when(
      as.numeric(covid19$Q154_4) == 1 ~ NA_real_,
      as.numeric(covid19$Q154_4) == 2 ~ NA_real_,
      as.numeric(covid19$Q154_4) == 3 ~ 2,
      as.numeric(covid19$Q154_4) == 4 ~ 3,
      as.numeric(covid19$Q154_4) == 5 ~ 4,
      as.numeric(covid19$Q154_4) == 6 ~ 5,
      as.numeric(covid19$Q154_4) == 7 ~ 6,
      as.numeric(covid19$Q154_4) == 8 ~ 0,
      as.numeric(covid19$Q154_4) == 9 ~ 1,
      as.numeric(covid19$Q154_4) == 10 ~ NA_real_
    )))

(covid19 <- covid19 %>%
    mutate(open1 = case_when(
      as.numeric(covid19$Q154_5) == 1 ~ NA_real_,
      as.numeric(covid19$Q154_5) == 2 ~ NA_real_,
      as.numeric(covid19$Q154_5) == 3 ~ 2,
      as.numeric(covid19$Q154_5) == 4 ~ 3,
      as.numeric(covid19$Q154_5) == 5 ~ 4,
      as.numeric(covid19$Q154_5) == 6 ~ 5,
      as.numeric(covid19$Q154_5) == 7 ~ 6,
      as.numeric(covid19$Q154_5) == 8 ~ 0,
      as.numeric(covid19$Q154_5) == 9 ~ 1,
      as.numeric(covid19$Q154_5) == 10 ~ NA_real_
    )))


(covid19 <- covid19 %>%
    mutate(extr2 = case_when(
      as.numeric(covid19$Q154_6) == 1 ~ NA_real_,
      as.numeric(covid19$Q154_6) == 2 ~ NA_real_,
      as.numeric(covid19$Q154_6) == 3 ~ 4,
      as.numeric(covid19$Q154_6) == 4 ~ 3,
      as.numeric(covid19$Q154_6) == 5 ~ 2,
      as.numeric(covid19$Q154_6) == 6 ~ 1,
      as.numeric(covid19$Q154_6) == 7 ~ 0,
      as.numeric(covid19$Q154_6) == 8 ~ 6,
      as.numeric(covid19$Q154_6) == 9 ~ 5,
      as.numeric(covid19$Q154_6) == 10 ~ NA_real_
    )))

(covid19 <- covid19 %>%
    mutate(agree2 = case_when(
      as.numeric(covid19$Q154_7) == 1 ~ NA_real_,
      as.numeric(covid19$Q154_7) == 2 ~ NA_real_,
      as.numeric(covid19$Q154_7) == 3 ~ 2,
      as.numeric(covid19$Q154_7) == 4 ~ 3,
      as.numeric(covid19$Q154_7) == 5 ~ 4,
      as.numeric(covid19$Q154_7) == 6 ~ 5,
      as.numeric(covid19$Q154_7) == 7 ~ 6,
      as.numeric(covid19$Q154_7) == 8 ~ 0,
      as.numeric(covid19$Q154_7) == 9 ~ 1,
      as.numeric(covid19$Q154_7) == 10 ~ NA_real_
    )))


(covid19 <- covid19 %>%
    mutate(consc2 = case_when(
      as.numeric(covid19$Q154_8) == 1 ~ NA_real_,
      as.numeric(covid19$Q154_8) == 2 ~ NA_real_,
      as.numeric(covid19$Q154_8) == 3 ~ 4,
      as.numeric(covid19$Q154_8) == 4 ~ 3,
      as.numeric(covid19$Q154_8) == 5 ~ 2,
      as.numeric(covid19$Q154_8) == 6 ~ 1,
      as.numeric(covid19$Q154_8) == 7 ~ 0,
      as.numeric(covid19$Q154_8) == 8 ~ 6,
      as.numeric(covid19$Q154_8) == 9 ~ 5,
      as.numeric(covid19$Q154_8) == 10 ~ NA_real_
    )))

(covid19 <- covid19 %>%
    mutate(neur2 = case_when(
      as.numeric(covid19$Q154_9) == 1 ~ NA_real_,
      as.numeric(covid19$Q154_9) == 2 ~ NA_real_,
      as.numeric(covid19$Q154_9) == 3 ~ 4,
      as.numeric(covid19$Q154_9) == 4 ~ 3,
      as.numeric(covid19$Q154_9) == 5 ~ 2,
      as.numeric(covid19$Q154_9) == 6 ~ 1,
      as.numeric(covid19$Q154_9) == 7 ~ 0,
      as.numeric(covid19$Q154_9) == 8 ~ 6,
      as.numeric(covid19$Q154_9) == 9 ~ 5,
      as.numeric(covid19$Q154_9) == 10 ~ NA_real_
    )))

(covid19 <- covid19 %>%
    mutate(open2 = case_when(
      as.numeric(covid19$Q154_10) == 1 ~ NA_real_,
      as.numeric(covid19$Q154_10) == 2 ~ NA_real_,
      as.numeric(covid19$Q154_10) == 3 ~ 4,
      as.numeric(covid19$Q154_10) == 4 ~ 3,
      as.numeric(covid19$Q154_10) == 5 ~ 2,
      as.numeric(covid19$Q154_10) == 6 ~ 1,
      as.numeric(covid19$Q154_10) == 7 ~ 0,
      as.numeric(covid19$Q154_10) == 8 ~ 6,
      as.numeric(covid19$Q154_10) == 9 ~ 5,
      as.numeric(covid19$Q154_10) == 10 ~ NA_real_
    )))


personality <- cbind(covid19$extr1, covid19$extr2, covid19$agree1, covid19$agree2, covid19$consc1, covid19$consc2, covid19$neur1, covid19$neur2, covid19$open1, covid19$open2)

covid19$extr <- covid19$extr1 + covid19$extr2
covid19$agree <- covid19$agree1 + covid19$agree2
covid19$consc <- covid19$consc1 + covid19$consc2
covid19$neur <- covid19$neur1 + covid19$neur2
covid19$open <- covid19$open1 + covid19$open2

summary(table3_model1 <- polr(as.factor(facemask) ~ extr + open + agree + consc + neur, data = covid19, Hess=TRUE))
summary(table3_model2 <- polr(as.factor(facemask) ~ extr + open + agree + consc + neur + as.factor(gender) + as.factor(age_cat) + as.factor(educ_cat) + as.factor(occup_cat) + time_home + as.factor(work_affect), data = covid19, Hess=TRUE))

(table3_column1_coef <- coef(summary(table3_model1)))
table3_column1_p <- pnorm(abs(table3_column1_coef[, "t value"]), lower.tail = FALSE) * 2
(table3_column1_ci <- confint(table3_model1))

(table3_column2_coef <- coef(summary(table3_model2)))
table3_column2_p <- pnorm(abs(table3_column2_coef[, "t value"]), lower.tail = FALSE) * 2
(table3_column2_ci <- confint(table3_model2))

table3_column1 <- cbind(round(exp(cbind(OR = coef(table3_model1), table3_column1_ci)), 2), ppersa = round(table3_column1_p[1:length(coef(table3_model1))], 2))
table3_column2 <- cbind(round(exp(cbind(OR = coef(table3_model2), table3_column2_ci)), 2), ppersb = round(table3_column2_p[1:length(coef(table3_model2))], 2))

TABLE_3 <- cbind(table3_column1, table3_column2[1:nrow(table3_column1),])

print(TABLE_3)

#Table 4: The Association Between Social Acceptability and Mask Use

# Panel A

summary(table4_panelA_model1 <- polr(as.factor(facemask) ~ reg_meanMINUSi + date + log(reg_cases_pc), data = covid19, Hess=TRUE))
coeftest(table4_panelA_model1, vcov=vcovCL(table4_panelA_model1, factor(covid19$region_code)))

summary(table4_panelA_model2 <- polr(as.factor(facemask) ~ reg_meanMINUSi + log(reg_cases_pc) + date + as.factor(gender) + as.factor(age_cat) + 
                               as.factor(educ_cat) + as.factor(occup_cat) + time_home + as.factor(work_affect), data = covid19, Hess=TRUE))
coeftest(table4_panelA_model2, vcov=vcovCL(table4_panelA_model2, factor(covid19$region_code)))

(table4_panelA_column1_coef <- coef(summary(table4_panelA_model1)))
table4_panelA_column1_p <- pnorm(abs(table4_panelA_column1_coef[, "t value"]), lower.tail = FALSE) * 2
(table4_panelA_column1_ci <- confint(table4_panelA_model1))

(table4_panelA_column2_coef <- coef(summary(table4_panelA_model2)))
table4_panelA_column2_p <- pnorm(abs(table4_panelA_column2_coef[, "t value"]), lower.tail = FALSE) * 2
(table4_panelA_column2_ci <- confint(table4_panelA_model2))

table4_PanelA_column1 <- cbind(round(exp(cbind(OR = coef(table4_panelA_model1), table4_panelA_column1_ci)), 2), prega = round(table4_panelA_column1_p[1:length(coef(table4_panelA_model1))], 2))
table4_PanelA_column2 <- cbind(round(exp(cbind(OR = coef(table4_panelA_model2), table4_panelA_column2_ci)), 2), pregb = round(table4_panelA_column2_p[1:length(coef(table4_panelA_model2))], 2))

TABLE_4_PANEL_A <- cbind(table4_PanelA_column1, table4_PanelA_column2[1:nrow(table4_PanelA_column1),])

#Panel B

summary(table4_panelB_model1 <- polr(as.factor(facemask) ~ prov_meanMINUSi + log(prov_cases_pc) + date, data = covid19, Hess=TRUE))
coeftest(table4_panelB_model1, vcov=vcovCL(table4_panelB_model1, factor(covid19$province_code) ))

summary(table4_panelB_model2 <- polr(as.factor(facemask) ~ prov_meanMINUSi + log(prov_cases_pc) + date + as.factor(gender) + as.factor(age_cat) +
                                as.factor(educ_cat) + as.factor(occup_cat) + time_home + as.factor(work_affect), data = covid19, Hess=TRUE))
coeftest(table4_panelB_model2, vcov=vcovCL(table4_panelB_model2, factor(covid19$province_code) ))

(table4_panelB_column1_coef <- coef(summary(table4_panelB_model1)))
table4_panelB_column1_p <- pnorm(abs(table4_panelB_column1_coef[, "t value"]), lower.tail = FALSE) * 2

(table4_panelB_column1_ci <- confint(table4_panelB_model1))

(table4_panelB_column2_coef <- coef(summary(table4_panelB_model2)))
table4_panelB_column2_p <- pnorm(abs(table4_panelB_column2_coef[, "t value"]), lower.tail = FALSE) * 2

(table4_panelB_column2_ci <- confint(table4_panelB_model2))

table4_PanelB_column1 <- cbind(round(exp(cbind(OR = coef(table4_panelB_model1), table4_panelB_column1_ci)), 2), pprovc = round(table4_panelB_column1_p[1:length(coef(table4_panelB_model1))], 2))
table4_PanelB_column2 <- cbind(round(exp(cbind(OR = coef(table4_panelB_model2), table4_panelB_column2_ci)), 2), pprovd = round(table4_panelB_column2_p[1:length(coef(table4_panelB_model2))], 2))

TABLE_4_PANEL_B <- cbind(table4_PanelB_column1, table4_PanelB_column2[1:nrow(table4_PanelB_column1),])

print(TABLE_4_PANEL_B)

#Table 5: The Association Between Demographic Characteristics and Mask Use by the Prevalence of Mask-wearing Behavior in the Province

covid19$low_prov_prev <- ifelse(covid19$prov_meanMINUSi < quantile(covid19$prov_meanMINUSi, 0.4, na.rm = T), 1, 0)
covid19$high_prov_prev <- ifelse(covid19$prov_meanMINUSi > quantile(covid19$prov_meanMINUSi, 0.6, na.rm = T), 1, 0)

covid19$gender <- as.factor(covid19$gender)
covid19$age_cat <- as.factor(covid19$age_cat)
covid19$educ_cat <- as.factor(covid19$educ_cat)
covid19$occup_cat <- as.factor(covid19$occup_cat)

summary(table5_column1 <- polr(`as.factor(facemask)` ~ (`as.factor(gender)` + `as.factor(age_cat)` + `as.factor(educ_cat)` + extr + open + agree + consc + neur +
                                                        `as.factor(occup_cat)` + `time_home` + `as.factor(work_affect)`) + `log(prov_cases_pc)` + date,
                            data = subset(demo_prev$model, demo_prev$model$prov_meanMINUSi < quantile(demo_prev$model$prov_meanMINUSi, 0.4, na.rm = T)),
                            Hess=TRUE))#model for low prevalent provinces

summary(table5_column2 <- polr(`as.factor(facemask)` ~ (`as.factor(gender)` + `as.factor(age_cat)` + `as.factor(educ_cat)` + extr + open + agree + consc + neur + 
                                                        `as.factor(occup_cat)`+ `time_home` + `as.factor(work_affect)` + extr + open + agree + consc + neur) + `log(prov_cases_pc)` + date,
                             data = subset(demo_prev$model, demo_prev$model$prov_meanMINUSi >= quantile(demo_prev$model$prov_meanMINUSi, 0.6, na.rm = T)),
                             Hess=TRUE))#model for high prevalent provinces

(table5_column1_coef <- coef(summary(table5_column1)))
table5_column1_p <- pnorm(abs(table5_column1_coef[, "t value"]), lower.tail = FALSE) * 2
(table5_column1_ci <- confint(table5_column1))

(table5_column2_coef <- coef(summary(table5_column2)))
table5_column2_p <- pnorm(abs(table5_column2_coef[, "t value"]), lower.tail = FALSE) * 2
(table5_column2_ci <- confint(table5_column2))

table5a <- cbind(round(exp(cbind(OR = coef(table5_column1), table5_column1_ci)), 2), pprova = round(table5_column1_p[1:length(coef(table5_column1))], 2))
table5b <- cbind(round(exp(cbind(OR = coef(table5_column2), table5_column2_ci)), 2), pprovb = round(table5_column2_p[1:length(coef(table5_column2))], 2))

TABLE_5 <-cbind(table5a[1:18,], table5b[1:18,])

print(TABLE_5)

#Table 5 Column "Diff P-Values"

mask_interact_data <- rbind(cbind(demo_lowprev$model, 'high' = 0), cbind(demo_highprev$model, 'high' = 1))

summary(demo_interact_prev <- polr(`as.factor(facemask)` ~ high*(`as.factor(gender)` + `as.factor(age_cat)` + `as.factor(educ_cat)` + 
                                                              `as.factor(occup_cat)` + `time_home` + `as.factor(work_affect)` + extr + open + agree + consc + neur + `log(prov_cases_pc)` + date),
                                   data = mask_interact_data,
                                   Hess= TRUE))

table_diff_pvalues <- round(pnorm(abs(coef(summary(demo_interact_prev))[, "t value"]), lower.tail = FALSE) * 2, 3) #we obtain the p-values of the difference between low and high prevalent areas from this interaction model

